res=GET('https://developer.nps.gov/api/v1/parks?limit=500&api_key=B9nDpbkbrb3kSOjz6kXSxMJ3d6MSpUvt1QqYdeyn')
data = res %>% content("text") %>% jsonlite::fromJSON() %>% as_tibble()
NPS_data=data %>% unnest(data) %>% select(fullName,latitude,longitude,topics, activities,states, parkCode) %>% janitor::clean_names() %>%
mutate(
latitude = as.numeric(latitude),
longitude = as.numeric(longitude)
) %>% unnest(activities, names_sep = "_") %>%
unnest(topics, names_sep = "_")
visitation_data <-
read_csv("data/Query Builder for Public Use Statistics (1979 - Last Calendar Year).csv") %>%
janitor::clean_names() %>%
mutate(unit_code = tolower(unit_code)) %>%
rename(park_code = unit_code,
full_name = park_name) %>%
select(full_name, park_code, park_type, region, state, year, month, recreation_visits, tent_campers, rv_campers, tent_campers, backcountry)
## Rows: 101419 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): ParkName, UnitCode, ParkType, Region, State, ParkNameTotal, UnitCo...
## dbl (3): Year, Month, YearTotal
## num (22): RecreationVisits, NonRecreationVisits, RecreationHours, NonRecreat...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
combined_data <- full_join(NPS_data, visitation_data, by = c("park_code"))
## Warning in full_join(NPS_data, visitation_data, by = c("park_code")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
faceted plot with rv, tent, and backcountry visits
visitation_data %>%
group_by(park_type, month) %>%
summarize(total_visitation = sum(recreation_visits)) %>%
plot_ly(x = ~as.factor(month), y = ~total_visitation, type = "scatter", mode = "lines", color = ~park_type)
## `summarise()` has grouped output by 'park_type'. You can override using the
## `.groups` argument.
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
visit_summary <-
visitation_data %>%
group_by(park_type, month) %>%
summarize(total_tent = sum(tent_campers),
total_backcountry = sum(backcountry),
total_rv = sum(rv_campers))
## `summarise()` has grouped output by 'park_type'. You can override using the
## `.groups` argument.
tent_campers_plot <- plot_ly(visit_summary,
x = ~as.factor(month),
y = ~total_tent,
type = "scatter",
mode = "lines",
color = ~park_type,
legendgroup = ~park_type) %>%
layout(title = "Tent Campers")
backcountry_plot <- plot_ly(visit_summary,
x = ~as.factor(month),
y = ~total_backcountry,
type = "scatter",
mode = "lines",
color = ~park_type,
legendgroup = ~park_type,
showlegend = FALSE) %>%
layout(title = "Backcountry")
rv_campers_plot <- plot_ly(visit_summary,
x = ~as.factor(month),
y = ~total_rv,
type = "scatter",
mode = "lines",
color = ~park_type,
legendgroup = ~park_type,
showlegend = FALSE) %>%
layout(title = "RV Campers")
subplot(
tent_campers_plot,
backcountry_plot,
rv_campers_plot,
nrows = 2,
shareX = TRUE,
shareY = TRUE
)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
visits by season boxplot
visitation_data %>%
mutate(season = case_when(
month %in% c(12,1,2) ~ "Winter",
month %in% c(3,4,5) ~ "Spring",
month %in% c(6,7,8) ~ "Summer",
TRUE ~ "Fall"
)) %>%
plot_ly(x = ~season, y = ~recreation_visits, type = "box")
total visitation by year
visitation_data %>%
group_by(park_type, year) %>%
summarize(total_visitation = sum(recreation_visits)) %>%
ggplot(aes(x = as.factor(year), y = total_visitation, color = park_type, group = park_type)) +
geom_line(linewidth = 0.7) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
## `summarise()` has grouped output by 'park_type'. You can override using the
## `.groups` argument.

total visitation by season
visitation_data %>%
mutate(season = case_when(
month %in% c(12,1,2) ~ "Winter",
month %in% c(3,4,5) ~ "Spring",
month %in% c(6,7,8) ~ "Summer",
TRUE ~ "Fall"
)) %>%
group_by(season) %>%
summarize(total_tent = sum(tent_campers),
backcountry_visits = sum(backcountry),
total_rv = sum(rv_campers)) %>%
pivot_longer(
total_tent:total_rv,
values_to = "total_visit",
names_to = "type_visit",
names_prefix = "total_"
) %>%
plot_ly(x = ~season, y = ~total_visit, color = ~type_visit, type = "bar")
mean visitation by park type and season
visitation_data %>%
mutate(season = case_when(
month %in% c(12,1,2) ~ "Winter",
month %in% c(3,4,5) ~ "Spring",
month %in% c(6,7,8) ~ "Summer",
TRUE ~ "Fall"
)) %>%
group_by(park_type, season) %>%
mutate(
mean_total = mean(recreation_visits, na.rm = TRUE)
) %>%
plot_ly(x = ~season, y = ~mean_total, color = ~park_type)
## No trace type specified:
## Based on info supplied, a 'bar' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#bar
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
mean visitation season and visit type
visitation_data %>%
mutate(season = case_when(
month %in% c(12,1,2) ~ "Winter",
month %in% c(3,4,5) ~ "Spring",
month %in% c(6,7,8) ~ "Summer",
TRUE ~ "Fall"
)) %>%
group_by(season) %>%
summarize(mean_tent = mean(tent_campers),
mean_backcountry = mean(backcountry),
mean_rv = mean(rv_campers)) %>%
pivot_longer(
mean_tent:mean_rv,
names_to = "type_visit",
values_to = "mean",
names_prefix = "mean_"
) %>%
plot_ly(x = ~season, y = ~mean, color = ~type_visit)
## No trace type specified:
## Based on info supplied, a 'bar' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#bar
map of all national parks in US
leaflet(data = NPS_data) %>%
addTiles() %>%
addMarkers(~longitude, ~latitude) %>%
setView(lng = -98.583, lat = 39.828, zoom = 4)
Mean recreation visit by park type (separate plots for each park type)
park_types <- unique(visitation_data$park_type)
plots <- list()
for (i in seq_along(park_types)) {
plots[[i]] <- visitation_data %>%
filter(park_type == park_types[i]) %>%
mutate(season = case_when(
month %in% c(12, 1, 2) ~ "Winter",
month %in% c(3, 4, 5) ~ "Spring",
month %in% c(6, 7, 8) ~ "Summer",
TRUE ~ "Fall"
)) %>%
group_by(season) %>%
summarize(avg_visits = mean(recreation_visits, na.rm = TRUE)) %>%
ggplot(aes(x = season, y = avg_visits, fill = season)) +
geom_col() +
ggtitle(paste("Average Visits by Season for", park_types[i]))
}
plots
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

##
## [[9]]

##
## [[10]]

##
## [[11]]

##
## [[12]]

##
## [[13]]

##
## [[14]]

##
## [[15]]

##
## [[16]]

##
## [[17]]

##
## [[18]]

National Preserves has pretty similar avg visits across seasons
combined_data %>%
filter(park_type == "National Preserve") %>%
group_by(activities_name) %>% distinct(activities_name) %>% knitr::kable()
| activities_name |
|---|
| Arts and Culture |
| Cultural Demonstrations |
| Auto and ATV |
| ATV Off-Roading |
| Scenic Driving |
| Astronomy |
| Stargazing |
| Biking |
| Road Biking |
| Boating |
| Boat Tour |
| Camping |
| Backcountry Camping |
| Car or Front Country Camping |
| Group Camping |
| RV Camping |
| Food |
| Picnicking |
| Guided Tours |
| Bus/Shuttle Guided Tour |
| Hiking |
| Backcountry Hiking |
| Front-Country Hiking |
| Hunting and Gathering |
| Hunting |
| Paddling |
| Canoeing |
| Kayaking |
| Junior Ranger Program |
| Wildlife Watching |
| Birdwatching |
| Park Film |
| Museum Exhibits |
| Shopping |
| Bookstore and Park Store |
| Canoe or Kayak Camping |
| Fishing |
| Hands-On |
| Citizen Science |
| Gift Shop and Souvenirs |
| Mountain Biking |
| Climbing |
| Rock Climbing |
| Freshwater Fishing |
| Fly Fishing |
| Horse Trekking |
| Horseback Riding |
| Swimming |
| Freshwater Swimming |
| Auto Off-Roading |
| Horse Camping (see also Horse/Stock Use) |
| Off-Trail Permitted Hiking |
| Horse Camping (see also camping) |
| Self-Guided Tours - Walking |
| Living History |
| Motorized Boating |
| Self-Guided Tours - Auto |
| Historic Weapons Demonstration |
| Stand Up Paddleboarding |
| Saltwater Swimming |
| Skiing |
| Cross-Country Skiing |
| Snowshoeing |
National Preserves has skiing, cross country skiing and snowshoeing, which might be the reason why there is a higher average visit in winter than any other park type.
Average visitation by type and park_type (separate graph for each park_type)
park_types <- unique(visitation_data$park_type)
plots <- list()
for (i in seq_along(park_types)) {
visit_summary <- visitation_data %>%
filter(park_type == park_types[i]) %>%
mutate(season = case_when(
month %in% c(12, 1, 2) ~ "Winter",
month %in% c(3, 4, 5) ~ "Spring",
month %in% c(6, 7, 8) ~ "Summer",
TRUE ~ "Fall"
)) %>%
group_by(season) %>%
summarize(
mean_tent = mean(tent_campers, na.rm = TRUE),
mean_backcountry = mean(backcountry, na.rm = TRUE),
mean_rv = mean(rv_campers, na.rm = TRUE)
) %>%
pivot_longer(
cols = starts_with("mean_"),
names_to = "type_visit",
values_to = "mean",
names_prefix = "mean_"
)
plots[[i]] <- ggplot(visit_summary, aes(x = season, y = mean)) +
geom_col() + facet_grid(~type_visit) +
ggtitle(paste("Average Visits by Season for", park_types[i]))
}
plots
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

##
## [[9]]

##
## [[10]]

##
## [[11]]

##
## [[12]]

##
## [[13]]

##
## [[14]]

##
## [[15]]

##
## [[16]]

##
## [[17]]

##
## [[18]]

Regional Comparisons
region_data <- visitation_data %>%
mutate(region = case_when(
state %in% c("CT", "RI", "NH", "VT", "NJ", "NY", "PA", "MD", "ME", "MA") ~ "northeast",
state %in% c("IL","IN", "MI", "OH", "WI", "IA", "KS", "MN", "MO", "NE", "ND", "SD") ~ "midwest",
state %in% c("FL", "GA", "NC", "SC", "VA", "DE", "WV", "AL", "KY", "MS", "TN", "AR", "LA", "OK", "TX", "DC") ~ "south",
state %in% c("AK", "CA", "HI", "OR", "WA", "AZ", "CO", "ID", "MT", "NV", "NM", "UT", "WY") ~ "west",
state %in% c("VI", "AS", "GU", "PR") ~ "u.s. territory",
TRUE ~ "no state data"
))
region_data <- combined_data %>%
mutate(region = case_when(
state %in% c("CT", "RI", "NH", "VT", "NJ", "NY", "PA", "MD", "ME", "MA") ~ "northeast",
state %in% c("IL","IN", "MI", "OH", "WI", "IA", "KS", "MN", "MO", "NE", "ND", "SD") ~ "midwest",
state %in% c("FL", "GA", "NC", "SC", "VA", "DE", "WV", "AL", "KY", "MS", "TN", "AR", "LA", "OK", "TX", "DC") ~ "south",
state %in% c("AK", "CA", "HI", "OR", "WA", "AZ", "CO", "ID", "MT", "NV", "NM", "UT", "WY") ~ "west",
state %in% c("VI", "AS", "GU", "PR") ~ "u.s. territory",
state == TRUE ~ "no state data"
)) %>% select(c(-full_name.y, -topics_id, -topics_name, -activities_id))
Northeast
region_data %>%
filter(region == "northeast") %>% drop_na() %>%
distinct(park_type) %>% knitr::kable()
| park_type |
|---|
| National Park |
| National Historical Park |
| National Monument |
| National Historic Site |
| National Battlefield |
| National Seashore |
| Park (Other) |
| National Recreation Area |
| National Memorial |
| National Military Park |
| International Historic Site |
| National Wild & Scenic River |
region_data %>%
filter(region == "northeast") %>% distinct(activities_name)
## # A tibble: 84 × 1
## activities_name
## <chr>
## 1 Arts and Culture
## 2 Cultural Demonstrations
## 3 Astronomy
## 4 Stargazing
## 5 Biking
## 6 Boating
## 7 Camping
## 8 Group Camping
## 9 Climbing
## 10 Rock Climbing
## # ℹ 74 more rows
region_data %>%
filter(region == "northeast") %>%
mutate(season = case_when(
month %in% c(12, 1, 2) ~ "Winter",
month %in% c(3, 4, 5) ~ "Spring",
month %in% c(6, 7, 8) ~ "Summer",
TRUE ~ "Fall"
)) %>%
group_by(season) %>%
summarize(
mean_tent = mean(tent_campers, na.rm = TRUE),
mean_backcountry = mean(backcountry, na.rm = TRUE),
mean_rv = mean(rv_campers, na.rm = TRUE)
) %>%
pivot_longer(
cols = starts_with("mean_"),
names_to = "type_visit",
values_to = "mean",
names_prefix = "mean_"
) %>% ggplot(aes(x = season, y = mean)) + geom_col() + facet_grid(~type_visit)

Midwest
region_data %>%
filter(region == "midwest") %>%
drop_na() %>%
distinct(park_type) %>% knitr::kable()
| park_type |
|---|
| National Monument |
| National Lakeshore |
| National Park |
| National Historical Park |
| National Historic Site |
| National Memorial |
| National River |
| National Wild & Scenic River |
| National Battlefield Park |
| National Preserve |
| National Battlefield |
region_data %>%
filter(region == "midwest") %>% distinct(activities_name)
## # A tibble: 84 × 1
## activities_name
## <chr>
## 1 Arts and Culture
## 2 Cultural Demonstrations
## 3 Astronomy
## 4 Stargazing
## 5 Food
## 6 Picnicking
## 7 Guided Tours
## 8 Self-Guided Tours - Walking
## 9 Hiking
## 10 Junior Ranger Program
## # ℹ 74 more rows
region_data %>%
filter(region == "midwest") %>%
mutate(season = case_when(
month %in% c(12, 1, 2) ~ "Winter",
month %in% c(3, 4, 5) ~ "Spring",
month %in% c(6, 7, 8) ~ "Summer",
TRUE ~ "Fall"
)) %>%
group_by(season) %>%
summarize(
mean_tent = mean(tent_campers, na.rm = TRUE),
mean_backcountry = mean(backcountry, na.rm = TRUE),
mean_rv = mean(rv_campers, na.rm = TRUE)
) %>%
pivot_longer(
cols = starts_with("mean_"),
names_to = "type_visit",
values_to = "mean",
names_prefix = "mean_"
) %>% ggplot(aes(x = season, y = mean)) + geom_col() + facet_grid(~type_visit)

South
region_data %>%
filter(region == "south") %>%
drop_na() %>%
distinct(park_type) %>%
knitr::kable()
| park_type |
|---|
| National Historical Park |
| National Monument |
| National Recreation Area |
| National Historic Site |
| National Memorial |
| National Park |
| National Preserve |
| National River |
| National Parkway |
| National Wild & Scenic River |
| National Seashore |
| National Military Park |
| National Battlefield |
| National Battlefield Park |
| Park (Other) |
region_data %>%
filter(region == "south") %>%
distinct(activities_name)
## # A tibble: 89 × 1
## activities_name
## <chr>
## 1 Astronomy
## 2 Stargazing
## 3 Food
## 4 Picnicking
## 5 Guided Tours
## 6 Self-Guided Tours - Walking
## 7 Hands-On
## 8 Junior Ranger Program
## 9 Wildlife Watching
## 10 Birdwatching
## # ℹ 79 more rows
region_data %>%
filter(region == "south") %>%
mutate(season = case_when(
month %in% c(12, 1, 2) ~ "Winter",
month %in% c(3, 4, 5) ~ "Spring",
month %in% c(6, 7, 8) ~ "Summer",
TRUE ~ "Fall"
)) %>%
group_by(season) %>%
summarize(
mean_tent = mean(tent_campers, na.rm = TRUE),
mean_backcountry = mean(backcountry, na.rm = TRUE),
mean_rv = mean(rv_campers, na.rm = TRUE)
) %>%
pivot_longer(
cols = starts_with("mean_"),
names_to = "type_visit",
values_to = "mean",
names_prefix = "mean_"
) %>% ggplot(aes(x = season, y = mean)) + geom_col() + facet_grid(~type_visit)

west
region_data %>% filter(region == "west") %>% drop_na() %>%
distinct(park_type) %>% knitr::kable()
| park_type |
|---|
| National Park |
| National Monument |
| National Historic Site |
| National Battlefield |
| National Recreation Area |
| National Historical Park |
| National Reserve |
| National Memorial |
| National Preserve |
| National Seashore |
region_data %>% filter(region == "west") %>% distinct(activities_name)
## # A tibble: 99 × 1
## activities_name
## <chr>
## 1 Arts and Culture
## 2 Astronomy
## 3 Stargazing
## 4 Biking
## 5 Camping
## 6 Backcountry Camping
## 7 Car or Front Country Camping
## 8 Group Camping
## 9 Canyoneering
## 10 Climbing
## # ℹ 89 more rows
region_data %>%
filter(region == "west") %>%
mutate(season = case_when(
month %in% c(12, 1, 2) ~ "Winter",
month %in% c(3, 4, 5) ~ "Spring",
month %in% c(6, 7, 8) ~ "Summer",
TRUE ~ "Fall"
)) %>%
group_by(season) %>%
summarize(
mean_tent = mean(tent_campers, na.rm = TRUE),
mean_backcountry = mean(backcountry, na.rm = TRUE),
mean_rv = mean(rv_campers, na.rm = TRUE)
) %>%
pivot_longer(
cols = starts_with("mean_"),
names_to = "type_visit",
values_to = "mean",
names_prefix = "mean_"
) %>% ggplot(aes(x = season, y = mean)) +
geom_col() +
facet_grid(~type_visit)

Comparing Regions
region_data %>% group_by(region) %>%
summarize(avg_visitation = mean(recreation_visits, na.rm = TRUE)) %>%
arrange(desc(avg_visitation))
## # A tibble: 6 × 2
## region avg_visitation
## <chr> <dbl>
## 1 west 149106.
## 2 south 95655.
## 3 northeast 83202.
## 4 midwest 44548.
## 5 u.s. territory 31868.
## 6 <NA> 4265.
region_long <- region_data %>%
pivot_longer(cols = c(recreation_visits, tent_campers, rv_campers, backcountry),
names_to = "visit_type",
values_to = "count") %>% view()
region_data %>%
group_by(region) %>%
summarize(
mean_tent = mean(tent_campers, na.rm = TRUE),
mean_backcountry = mean(backcountry, na.rm = TRUE),
mean_rv = mean(rv_campers, na.rm = TRUE)
) %>%
pivot_longer(
cols = starts_with("mean_"),
names_to = "type_visit",
values_to = "mean",
names_prefix = "mean_"
) %>% ggplot(aes(x = region, y = mean)) + geom_col() + facet_grid(~type_visit) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

region_data %>%
filter(region == "u.s. territory") %>%
distinct(activities_name)
## # A tibble: 41 × 1
## activities_name
## <chr>
## 1 Boating
## 2 Sailing
## 3 Hiking
## 4 Paddling
## 5 Kayaking
## 6 SCUBA Diving
## 7 Snorkeling
## 8 Swimming
## 9 Saltwater Swimming
## 10 Wildlife Watching
## # ℹ 31 more rows
region_data %>%
filter(region == "no state data") %>%
distinct(activities_name)
## # A tibble: 0 × 1
## # ℹ 1 variable: activities_name <chr>
region_data %>%
group_by(region) %>%
summarize(parks = n_distinct(park_code)) %>% knitr::kable()
| region | parks |
|---|---|
| midwest | 46 |
| northeast | 68 |
| south | 117 |
| u.s. territory | 7 |
| west | 116 |
| NA | 122 |